package lexer
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
const (
XItemError XItemType = "Error"
XItemAbsLocPath = "Absolute path"
XItemAbbrAbsLocPath = "Abbreviated absolute path"
XItemAbbrRelLocPath = "Abbreviated relative path"
XItemRelLocPath = "Relative path"
XItemEndPath = "End path instruction"
XItemAxis = "Axis"
XItemAbbrAxis = "Abbreviated attribute axis"
XItemNCName = "Namespace"
XItemQName = "Local name"
XItemNodeType = "Node type"
XItemProcLit = "processing-instruction"
XItemFunction = "function"
XItemArgument = "function argument"
XItemEndFunction = "end of function"
XItemPredicate = "predicate"
XItemEndPredicate = "end of predicate"
XItemStrLit = "string literal"
XItemNumLit = "numeric literal"
XItemOperator = "operator"
XItemVariable = "variable"
)
const (
eof = -(iota + 1 )
)
type XItemType string
type XItem struct {
Typ XItemType
Val string
}
type stateFn func (*Lexer ) stateFn
type Lexer struct {
input string
start int
pos int
width int
items chan XItem
}
func Lex (xpath string ) chan XItem {
l := &Lexer {
input : xpath ,
items : make (chan XItem ),
}
go l .run ()
return l .items
}
func (l *Lexer ) run () {
for state := startState ; state != nil ; {
state = state (l )
}
if l .peek () != eof {
l .errorf ("Malformed XPath expression" )
}
close (l .items )
}
func (l *Lexer ) emit (t XItemType ) {
l .items <- XItem {t , l .input [l .start :l .pos ]}
l .start = l .pos
}
func (l *Lexer ) emitVal (t XItemType , val string ) {
l .items <- XItem {t , val }
l .start = l .pos
}
func (l *Lexer ) next () (r rune ) {
if l .pos >= len (l .input ) {
l .width = 0
return eof
}
r , l .width = utf8 .DecodeRuneInString (l .input [l .pos :])
l .pos += l .width
return r
}
func (l *Lexer ) ignore () {
l .start = l .pos
}
func (l *Lexer ) backup () {
l .pos -= l .width
}
func (l *Lexer ) peek () rune {
r := l .next ()
l .backup ()
return r
}
func (l *Lexer ) peekAt (n int ) rune {
if n <= 1 {
return l .peek ()
}
width := 0
var ret rune
for count := 0 ; count < n ; count ++ {
r , s := utf8 .DecodeRuneInString (l .input [l .pos +width :])
width += s
if l .pos +width > len (l .input ) {
return eof
}
ret = r
}
return ret
}
func (l *Lexer ) accept (valid string ) bool {
if strings .ContainsRune (valid , l .next ()) {
return true
}
l .backup ()
return false
}
func (l *Lexer ) acceptRun (valid string ) {
for strings .ContainsRune (valid , l .next ()) {
}
l .backup ()
}
func (l *Lexer ) skip (num int ) {
for i := 0 ; i < num ; i ++ {
l .next ()
}
l .ignore ()
}
func (l *Lexer ) skipWS (ig bool ) {
for {
n := l .next ()
if n == eof || !unicode .IsSpace (n ) {
break
}
}
l .backup ()
if ig {
l .ignore ()
}
}
func (l *Lexer ) errorf (format string , args ...interface {}) stateFn {
l .items <- XItem {
XItemError ,
fmt .Sprintf (format , args ...),
}
return nil
}
func isElemChar(r rune ) bool {
return string (r ) != ":" && string (r ) != "/" &&
(unicode .Is (first , r ) || unicode .Is (second , r ) || string (r ) == "*" ) &&
r != eof
}
func startState(l *Lexer ) stateFn {
l .skipWS (true )
if string (l .peek ()) == "/" {
l .next ()
l .ignore ()
if string (l .next ()) == "/" {
l .ignore ()
return abbrAbsLocPathState
}
l .backup ()
return absLocPathState
} else if string (l .peek ()) == `'` || string (l .peek ()) == `"` {
if err := getStrLit (l , XItemStrLit ); err != nil {
return l .errorf (err .Error())
}
if l .peek () != eof {
return startState
}
} else if getNumLit (l ) {
l .skipWS (true )
if l .peek () != eof {
return startState
}
} else if string (l .peek ()) == "$" {
l .next ()
l .ignore ()
r := l .peek ()
for unicode .Is (first , r ) || unicode .Is (second , r ) {
l .next ()
r = l .peek ()
}
tok := l .input [l .start :l .pos ]
if len (tok ) == 0 {
return l .errorf ("Empty variable name" )
}
l .emit (XItemVariable )
l .skipWS (true )
if l .peek () != eof {
return startState
}
} else if st := findOperatorState (l ); st != nil {
return st
} else {
if isElemChar (l .peek ()) {
colons := 0
for {
if isElemChar (l .peek ()) {
l .next ()
} else if string (l .peek ()) == ":" {
l .next ()
colons ++
} else {
break
}
}
if string (l .peek ()) == "(" && colons <= 1 {
tok := l .input [l .start :l .pos ]
err := procFunc (l , tok )
if err != nil {
return l .errorf (err .Error())
}
l .skipWS (true )
if string (l .peek ()) == "/" {
l .next ()
l .ignore ()
if string (l .next ()) == "/" {
l .ignore ()
return abbrRelLocPathState
}
l .backup ()
return relLocPathState
}
return startState
}
l .pos = l .start
return relLocPathState
} else if string (l .peek ()) == "@" {
return relLocPathState
}
}
return nil
}
func strPeek(str string , l *Lexer ) bool {
for i := 0 ; i < len (str ); i ++ {
if string (l .peekAt (i +1 )) != string (str [i ]) {
return false
}
}
return true
}
func findOperatorState(l *Lexer ) stateFn {
l .skipWS (true )
switch string (l .peek ()) {
case ">" , "<" , "!" :
l .next ()
if string (l .peek ()) == "=" {
l .next ()
}
l .emit (XItemOperator )
return startState
case "|" , "+" , "-" , "*" , "=" :
l .next ()
l .emit (XItemOperator )
return startState
case "(" :
l .next ()
l .emit (XItemOperator )
for state := startState ; state != nil ; {
state = state (l )
}
l .skipWS (true )
if string (l .next ()) != ")" {
return l .errorf ("Missing end )" )
}
l .emit (XItemOperator )
return startState
}
if strPeek ("and" , l ) {
l .next ()
l .next ()
l .next ()
l .emit (XItemOperator )
return startState
}
if strPeek ("or" , l ) {
l .next ()
l .next ()
l .emit (XItemOperator )
return startState
}
if strPeek ("mod" , l ) {
l .next ()
l .next ()
l .next ()
l .emit (XItemOperator )
return startState
}
if strPeek ("div" , l ) {
l .next ()
l .next ()
l .next ()
l .emit (XItemOperator )
return startState
}
return nil
}
func getStrLit(l *Lexer , tok XItemType ) error {
q := l .next ()
var r rune
l .ignore ()
for r != q {
r = l .next ()
if r == eof {
return fmt .Errorf ("Unexpected end of string literal." )
}
}
l .backup ()
l .emit (tok )
l .next ()
l .ignore ()
return nil
}
func getNumLit(l *Lexer ) bool {
const dig = "0123456789"
l .accept ("-" )
start := l .pos
l .acceptRun (dig )
if l .pos == start {
return false
}
if l .accept ("." ) {
l .acceptRun (dig )
}
l .emit (XItemNumLit )
return true
}
The pages are generated with Golds v0.6.7 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds .